#!/usr/bin/Rscript
##########################################################################################
# Issue Ownership and Agenda Setting in the 2019 Swiss National Elections
##########################################################################################
# Description:
# Remove unused Variables from Original Data
##########################################################################################
# Content
##########################################################################################
# 1) Dependencies
# 2) Load Data
# 3) Reduce Data
# 4) Save minified Data
##########################################################################################
# 1) Dependencies
##########################################################################################
library(dplyr)
library(tidyverse)
library(data.table)
library(readr)
library(lubridate)
library(purrr)
library(magrittr)
##########################################################################################
# 2) Load Data
##########################################################################################
rm(list=ls())
# - set dir
args = commandArgs()

scriptName = args[substr(args,1,7) == '--file=']

if (length(scriptName) == 0) {
  scriptName <- rstudioapi::getSourceEditorContext()$path
} else {
  scriptName <- substr(scriptName, 8, nchar(scriptName))
}

pathName = substr(
  scriptName, 
  1, 
  nchar(scriptName) - nchar(strsplit(scriptName, '.*[/|\\]')[[1]][2])
)

setwd(pathName)
parent_path <- getwd()

# - Load Data
smd18 <- read_rds("../Data/smd_db_classified_2018.RDS") %>% select(-c("tx"))
names(smd18)
smd19 <- read_rds("../Data/smd_db_classified_2019.RDS") %>% select(-c("tx"))
names(smd19)

tweets <- read_rds("../Data/Tweets_2018_2019_curated_sentiment_class.RDS")

# Remove Some unused Columns from Twitter Data
tweets <- tweets %>% select(-c(X_index,X_type,X_id,X_score,X_ignored, text,
                               co,Geo_coords,Urls_expanded_url,Urls_url,
                               co.1,co.2,Profile_background_url,Profile_banner_url,
                               Coords_coords,Place_url,Bbox_coords,Profile_image_url,
                               Ext_media_expanded_url,Display_text_width,
                               Media_expanded_url,Profile_expanded_url))

# Garbage Collection:
gc()

# Remove Some Columns we do not need either:
smd18 <- smd18 %>% select(-c("X_id", "X_type", "X_index", "nz", "sm"))
smd19 <- smd19 %>% select(-c("X_id", "X_type", "X_index", "url", "annotation_geography"))

#Combine both Years in one Dataframe
smd <- rbind(smd18,smd19)
rm(smd18,smd19)

# Read full Pressrelease Dataframe and subset it:
press <- read_rds("../Data/Pressreleases_classified_2000-2019.RDS") %>% 
  filter(Datum > as.Date("2017-12-31")) %>% 
  select(-c("X_index", "X_type", "X_id", "X_score")) %>% 
  rename(pubDateTime = Datum)
names(press)

# Remove Text:
names(smd)
smd <- smd %>% select(-c("ru","ut","ht","annotation_person"))

names(press)
press <- press %>% select(-c("tx","url"))

names(tweets)
tweets <- tweets %>% select(-c("Text", "Symbols","Source","Retweet_text","Retweet_source",
                               "Retweet_location","Retweet_description","Quoted_text","Quoted_source",
                               "Quoted_location","Quoted_description","Media_url","Media_type","Location",
                               "Country_code","codec","Ext_media_url","Ext_media_type"))
# Garbage Collection:
gc()
##########################################################################################
# 4) Save minified Data
##########################################################################################
write_rds(smd, "../Data/smd_minified_2018-2019.RDS")
write_rds(press, "../Data/pressreleases_2018-2019.RDS")
write_rds(tweets, "../Data/Tweets_minified_2018_2019_curated_sentiment_class.RDS")
##########################################################################################

